Wavelet Analysis of spectral data

This document presents the steps and results of a wavelet analysis from the raw data to the final plot presented in Thor et al.

Initial Curve Inspection

Below we present the raw curves of fluorescence intensity over time, split by cell-number, tech-rep, and genotype. These intensity curves are very noisy with an intensity decay as time progresses.

library(magrittr)
library(ggplot2)
library(dplyr)
library(tidyr)
library(purrr)
library(WaveletComp)
library(MESS)


devtools::load_all("../fluorpeaks")
## Loading fluorpeaks
d <- read_peaks("../data/raw_data.xlsx") %>% rename_kathrin()
d
## # A tibble: 174,202 x 9
##    minutes start_time line  cell_number tech_rep bio_rep reporter intensity
##      <dbl>      <dbl> <chr> <chr>       <chr>    <chr>   <chr>        <dbl>
##  1   0           9.83 WT    1           1        1       YFP           700.
##  2   0.333       9.83 WT    1           1        1       YFP           658.
##  3   0.667       9.83 WT    1           1        1       YFP           658.
##  4   1           9.83 WT    1           1        1       YFP           661.
##  5   1.33        9.83 WT    1           1        1       YFP           650.
##  6   1.67        9.83 WT    1           1        1       YFP           645.
##  7   2           9.83 WT    1           1        1       YFP           644.
##  8   2.33        9.83 WT    1           1        1       YFP           644.
##  9   2.67        9.83 WT    1           1        1       YFP           644.
## 10   3           9.83 WT    1           1        1       YFP           644.
## # … with 174,192 more rows, and 1 more variable: has_started <lgl>
d %>% all_data_plot()

YFP/CFP Ratio over time

The computed YFP/CFP ratio is the primary metric of interest. Data are presented up to 50 minutes, the data collection end point. The curves remain noisy but the initial peak followed by decay is observable.

ratio_added <- d %>% 
  filter(has_started == TRUE, minutes <= 50) %>%
  group_by(line, bio_rep, tech_rep, cell_number) %>% 
  ratio_curve(value = intensity )  

ratio_added %>% ratio_plot()

Wavelet Extraction of Signifcant Signal

In order to find the significant large signal structure within the noisy curve, we apply a wavelet reconstruction of the curves.

wavelet_reconstruction <- function(df, span = 100, re_scale = FALSE, val_col = "intensity"){
  bin <- capture.output(wave <- analyze.wavelet(df, val_col, loess.span = span, verbose = FALSE) )
  bin <- capture.output(r <- reconstruct(wave, plot.waves = FALSE, plot.rec = FALSE, rescale = re_scale) )
  
  tag <- paste0(val_col, ".r")
  return(data.frame(reconstructed_intensity = r$series[[tag]]) )
}



ratio_added %>% 
  filter(has_started == TRUE, minutes <= 50) %>%
  group_by(line, bio_rep, tech_rep, cell_number) %>%
  nest() %>%
  mutate(reconstructed = map(data, wavelet_reconstruction, val_col = "ratio" ) ) %>%
  unnest() %>%
  ratio_recons_plot()

Area Under Curve \(t \leq 5 \textrm{minutes}\)

Assuming that OSCA1.3 as a plasma membrane calcium channel activated by BIK1 is involved in the immediate calcium influx after flg22 perception. We therefore can look at the area under the wavelet curve (AUC) in the first five minutes.

get_auc <- function(df){
  
  steps <- 1:length(df$ratio)
  auc(steps, df$ratio, absolutearea = TRUE)
  
}

auc_data_short <- ratio_added %>% 
  filter(has_started == TRUE, minutes <= (start_time + 5) ) %>%
  group_by(line, bio_rep, tech_rep, cell_number) %>%
  nest() %>%
  mutate(aucurve = map(data, get_auc)) %>%
  unnest( .preserve = c(aucurve)) %>% unnest() %>% 
  group_by(line, bio_rep, tech_rep, cell_number) %>%
  summarise(n_auc = n_distinct(aucurve), act_auc = first(aucurve) )

auc_data_short
## # A tibble: 125 x 6
## # Groups:   line, bio_rep, tech_rep [?]
##    line        bio_rep tech_rep cell_number n_auc act_auc
##    <chr>       <chr>   <chr>    <chr>       <int>   <dbl>
##  1 osca1.3/1.7 1       1        1               1    86.5
##  2 osca1.3/1.7 1       1        2               1    90.7
##  3 osca1.3/1.7 1       1        3               1    85.9
##  4 osca1.3/1.7 1       1        4               1    75.0
##  5 osca1.3/1.7 1       1        5               1    91.1
##  6 osca1.3/1.7 1       1        6               1    98.5
##  7 osca1.3/1.7 1       2        1               1   113. 
##  8 osca1.3/1.7 1       2        2               1    97.3
##  9 osca1.3/1.7 1       2        3               1    92.4
## 10 osca1.3/1.7 1       2        4               1    86.8
## # … with 115 more rows
ggplot(auc_data_short) + aes(bio_rep, as.integer(act_auc) ) + geom_boxplot(aes(colour = line), notch = TRUE) + geom_jitter(aes(colour = line), position = position_dodge(width = 0.5)) 
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.

Significance Testing of AUC \(t \leq 5 \textrm{minutes}\).

A linear model of the AUC responding to genotype followed by ANOVA is used to test the null hypothesis that the difference between the mean AUC between genotypes is equal to 0.

library(nlme)
model <- lme(act_auc ~ line, random =~ 1|bio_rep, data = auc_data_short)
summary(model)
## Linear mixed-effects model fit by REML
##  Data: auc_data_short 
##        AIC      BIC    logLik
##   909.5038 920.7526 -450.7519
## 
## Random effects:
##  Formula: ~1 | bio_rep
##         (Intercept) Residual
## StdDev:    4.568105 8.890754
## 
## Fixed effects: act_auc ~ line 
##                Value Std.Error  DF  t-value p-value
## (Intercept) 86.53165  2.552796 120 33.89682  0.0000
## lineWT       4.94677  1.593509 120  3.10433  0.0024
##  Correlation: 
##        (Intr)
## lineWT -0.319
## 
## Standardized Within-Group Residuals:
##          Min           Q1          Med           Q3          Max 
## -1.903292957 -0.608880677  0.005725672  0.602274223  2.813395713 
## 
## Number of Observations: 125
## Number of Groups: 4
anova(model)
##             numDF denDF   F-value p-value
## (Intercept)     1   120 1355.4500  <.0001
## line            1   120    9.6368  0.0024

With ths model $ p < 0.05$, indicating that the difference between the mean AUCs of the genotypes is not likely to be 0. We therefore reject the null hypothesis.

Effect Size Between Genotypes

The size of the effect is computed by taking difference in means between AUC in the genotypes. We express as a percentage difference.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:nlme':
## 
##     collapse
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
group_means <- auc_data_short %>% 
  group_by(line) %>%
  summarise(mean = mean(act_auc))
## Warning: Detecting old grouped_df format, replacing `vars` attribute by `groups`
group_means
## # A tibble: 2 x 2
##   line         mean
##   <chr>       <dbl>
## 1 osca1.3/1.7  86.7
## 2 WT           91.6

Percent change = 5.4049494 .

Final Image

A rendering of the AUC data in a single, print-friendly panel.

library(ggplot2)
library(forcats)
auc_data_short$line <- factor(auc_data_short$line, levels = rev(levels(as.factor(auc_data_short$line))))
auc_data_short
## # A tibble: 125 x 6
## # Groups:   line, bio_rep, tech_rep [24]
##    line        bio_rep tech_rep cell_number n_auc act_auc
##    <fct>       <chr>   <chr>    <chr>       <int>   <dbl>
##  1 osca1.3/1.7 1       1        1               1    86.5
##  2 osca1.3/1.7 1       1        2               1    90.7
##  3 osca1.3/1.7 1       1        3               1    85.9
##  4 osca1.3/1.7 1       1        4               1    75.0
##  5 osca1.3/1.7 1       1        5               1    91.1
##  6 osca1.3/1.7 1       1        6               1    98.5
##  7 osca1.3/1.7 1       2        1               1   113. 
##  8 osca1.3/1.7 1       2        2               1    97.3
##  9 osca1.3/1.7 1       2        3               1    92.4
## 10 osca1.3/1.7 1       2        4               1    86.8
## # … with 115 more rows
xlabels <- c(
  expression(paste("WT")), 
  expression(paste(italic("osca1.3/1.7")))
  )


ggplot(auc_data_short) + aes(line, as.integer(act_auc) ) + geom_boxplot(aes(colour = line), notch = TRUE) + geom_jitter(aes(colour = line, shape = bio_rep), position = position_dodge(width = 0.5))  +
  scale_x_discrete(labels = xlabels ) +
  theme_bw() + 
  labs(x = NULL, y = "Area Under Curve") + 
  theme(legend.position = "none") + 
  scale_colour_manual(values= c("black", "black")) + 
  theme(axis.title = element_text(family = "Helvetica", size = 14)) + 
  theme(axis.text.x = element_text(family = "Helvetica", size = 12, colour = "black"))

ggsave("cameleon.svg", width = 55, height = 100, units = "mm")
ggsave("cameleon.png", width = 55, height = 100, units = "mm")

readr::write_csv(auc_data_short, "graph_data.csv")